Read in and clean raw DO data.

Start testing rules:

min.points = 2
slope.thresh = -0.006
do.thresh = 2
high.do = 14
time.thresh = 4
fast = 5.5 
bpfit = 0.1
high.slope.thresh = -0.04 
time.same = 7
break.toggle = 1.4
conc.range = 1.4
plot.toggle = TRUE

Fit a linear model to data with chosen rules

Remove Outlier Samples

slope.new <- respiration %>%
   separate(Sample_Name, c("ECA", "kit", "rep"), sep = "_", remove = FALSE) %>%
  mutate(Treat = case_when(grepl("W",rep)~"Wet",
                           grepl("D", rep) ~"Dry")) %>%
  unite(kit_treat, kit, Treat, remove = FALSE) %>%
  group_by(kit_treat) %>%
  mutate(Mean_Slope_All = mean(slope_of_the_regression)) %>%
   mutate(cv_before_removal = (abs(sd(slope_of_the_regression)/mean(slope_of_the_regression)))*100) %>%
ungroup() %>%
    dplyr::select(c(Sample_Name, kit_treat, kit,rep,slope_of_the_regression,rate_mg_per_L_per_min,rate_mg_per_L_per_h, Treat, Mean_Slope_All, cv_before_removal))

 slope.new$flag <- NA

 slope.final <- as.data.frame(matrix(NA, ncol = 13, nrow =1))

 colnames(slope.final) = c("slope.temp","Sample_Name", "kit_treat", "kit", "rep", "rate_mg_per_L_per_min","rate_mg_per_L_per_h", "Treat", "Mean_Slope_All","cv_before_removal", "cv_after_removal", "Mean_Slope_Removed","flag")

unique.samples = unique(slope.new$kit_treat)

for (i in 1:length(unique.samples)) {

  data_subset = subset(slope.new, slope.new$kit_treat == unique.samples[i])

  slope.temp = as.numeric(data_subset$slope_of_the_regression)

  slope.temp.sd <- sd(slope.temp)
  slope.temp.mean <- mean(slope.temp)
  CV = abs((slope.temp.sd/slope.temp.mean)*100)

  #looping to get 4 out of 5 best samples
  for (sample.reduction in 1:5)  {

    if (slope.temp.mean == 0) {

      CV = 0

    }

    else if (length(slope.temp) > 4 & CV >= 10) {

      dist.temp = as.matrix(abs(dist(slope.temp)))
      dist.comp = numeric()

      for(slope.now in 1:ncol(dist.temp)) {

        dist.comp = rbind(dist.comp,c(slope.now,sum(dist.temp[,slope.now])))

      }

      dist.comp[,2] = as.numeric(dist.comp[,2])
      slope.temp = slope.temp[-which.max(dist.comp[,2])]

      slope.temp.sd <- sd(slope.temp)
      slope.temp.mean <- mean(slope.temp)
      slope.temp.cv <- abs((slope.temp.sd/slope.temp.mean)*100)
      CV = slope.temp.cv
      slope.temp.range <- max(slope.temp) - min(slope.temp)
      range = slope.temp.range

      }
  }

  if (length(slope.temp) >= 3) {

    slope.combined <- as.data.frame(slope.temp)

    slope.removed <- merge(slope.combined, data_subset, by.x = "slope.temp", by.y = "slope_of_the_regression", all.x = TRUE)

    slope.removed <- slope.removed[!duplicated(slope.removed$Sample_Name), ]

    slope.removed$cv_after_removal = as.numeric(abs((sd(slope.temp)/mean(slope.temp))*100))

    slope.removed$Mean_Slope_Removed = as.numeric(mean(slope.temp))

  }

  slope.final = rbind(slope.removed, slope.final)

  rm('slope.temp')
}

## This data frame has removed samples
slope.final$flag <- ifelse(slope.final$cv_before_removal < slope.final$cv_after_removal, "Issue in dropping sample", NA)

slope.final <- rename(slope.final, "slope_of_the_regression" = "slope.temp")

slope.final$rem <- abs(slope.final$slope_of_the_regression) - slope.final$rate_mg_per_L_per_min

slope.final <- slope.final[!is.na(slope.final$Sample_Name),]

Calculate Effect Size/Make Histograms of data with no removals

data_clean <- slope.new %>% 
  mutate(slope_of_the_regression = if_else(slope_of_the_regression>0,0,slope_of_the_regression)) %>%
  mutate(rate_mg = if_else(slope_of_the_regression>=0,0,rate_mg_per_L_per_min)) %>% 
  mutate(Mean_Slope_All = if_else(Mean_Slope_All >0,0,Mean_Slope_All)) %>% 
  mutate(Mean_Slope = abs(Mean_Slope_All))

wet_no <- ggplot(subset(data_clean, Treat %in% "Wet"), aes(x = rate_mg_per_L_per_min)) +
  geom_histogram()+
  ggtitle("Wet Rates, No Removals")+
  theme(strip.text = element_text(
    size = 4))

dry_no <- ggplot(subset(data_clean, Treat %in% "Dry"), aes(x = rate_mg_per_L_per_min)) +
  geom_histogram()+
  ggtitle("Dry Rates, No Removals")+
  theme(strip.text = element_text(
    size = 4))

data_clean_effect <- data_clean %>% 
  filter(!grepl(011, kit)) %>% 
  filter(!grepl(012, kit)) %>% 
  distinct(kit_treat, .keep_all = TRUE) %>% 
  group_by(kit) %>% 
  mutate(effect = (Mean_Slope[Treat == "Wet"] - Mean_Slope[Treat == "Dry"])) %>% 
  mutate(log_effect = log10(abs(effect+1))) %>% 
  distinct(kit, .keep_all = TRUE)
 
effect_no <- ggplot(data_clean_effect, aes(x = effect)) + 
  geom_histogram() +
  ggtitle("Effect Size, No Removals")+
  theme(strip.text = element_text(
    size = 4))
data_clean_removals = slope.final %>%
  mutate(slope_of_the_regression = if_else(slope_of_the_regression>0,0,slope_of_the_regression)) %>%
  mutate(rate_mg = if_else(slope_of_the_regression>=0,0,rate_mg_per_L_per_min)) %>%
  mutate(Mean_Slope_Removed = if_else(Mean_Slope_Removed>0,0,Mean_Slope_Removed)) %>%
  dplyr::select(c(Sample_Name,rate_mg, Mean_Slope_Removed)) %>%
  rename(rate_mg_per_L_per_min = rate_mg) %>%
  mutate(rate_mg_per_L_per_h = rate_mg_per_L_per_min*60) %>%
  separate(Sample_Name, into = c("EC", "kit", "INC"), remove = FALSE) %>%
  separate(Sample_Name, into = c("ID", "rep"), sep = "-", remove = FALSE) %>%
  mutate(Treat = case_when(grepl("W",rep)~"Wet",
                           grepl("D", rep) ~"Dry")) %>%
  unite(kit_treat, c("kit", "Treat"),  sep = "_", remove = FALSE)
## Warning: Expected 3 pieces. Additional pieces discarded in 452 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
wet_rem <- ggplot(subset(data_clean_removals, Treat %in% "Wet"), aes(x = rate_mg_per_L_per_min)) +
  geom_histogram()+
  ggtitle("Wet Rates, Removals")+
  theme(strip.text = element_text(
    size = 4))

dry_rem <- ggplot(subset(data_clean_removals, Treat %in% "Dry"), aes(x = rate_mg_per_L_per_min)) +
  geom_histogram()+
  ggtitle("Dry Rates, Removals")+
  theme(strip.text = element_text(
    size = 4))

data_clean_rem_effect <- data_clean_removals %>% 
  filter(!grepl(011, kit)) %>% 
  filter(!grepl(012, kit)) %>% 
  mutate(Mean_Slope = abs(Mean_Slope_Removed)) %>% 
  distinct(kit_treat, .keep_all = TRUE) %>% 
  group_by(kit) %>% 
  mutate(effect = (Mean_Slope[Treat == "Wet"] - Mean_Slope[Treat == "Dry"])) %>% 
  mutate(log_effect = log10(abs(effect+1))) %>% 
  distinct(kit, .keep_all = TRUE)
 
effect_rem <- ggplot(data_clean_rem_effect, aes(x = effect)) + 
  geom_histogram() +
  ggtitle("Effect Size, No Removals")+
  theme(strip.text = element_text(
    size = 4))

all <- (wet_no + wet_rem + dry_no + dry_rem + effect_no + effect_rem) +
  plot_layout(widths = c(3,3))

print(all)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

effect.path <- paste0("C:/Users/",pnnl.user,"/PNNL/Core Richland and Sequim Lab-Field Team - Documents/Data Generation and Files/ECA/Optode multi reactor/Optode_multi_reactor_incubation/effect size/Effect_Size_Data/")



#write.csv(data_clean_rem_effect, paste0(effect.path,"Effect_Size_merged_by_", pnnl.user,"_on_",Sys.Date(),".csv"), row.names = F)

respiration.removals.path <- paste0("C:/Users/",pnnl.user,"/PNNL/Core Richland and Sequim Lab-Field Team - Documents/Data Generation and Files/ECA/Optode multi reactor/Optode_multi_reactor_incubation/rates/Plots/All_Respiration_Rates/")

#write.csv(data_clean_removals, paste0(respiration.removals.path,"removed_respiration_merged_by_", pnnl.user,"_on_",Sys.Date(),".csv"), row.names = F)